#!/usr/bin/python
# -*- coding: latin1 -*-

###########################
### Autor: Sebastian Enger / M.Sc.
### Copyright: Sebastian Enger
### Licence: Commercial / OneTipp
### Version: 1.0.4  - 12-10-2015@22:53 Uhr
### Contact: sebastian.enger@gmail.com
### OneTipp Text Tool in Python
###########################

######## export PYTHON_EGG_CACHE=/tmp

import os
import pprint
import nltk
#import rocksdb                                         # shared library kann aktuell noch nicht gelesen werden
import MySQLdb                                          # apt-get install python-mysqldb
from sphinxit.core.processor import Search              # http://sphinxit.readthedocs.org/en/latest/
from sphinxit.core.helpers import BaseSearchConfig
import random
import codecs
import sys
import unicodedata

os.environ['PYTHON_EGG_CACHE'] = '/tmp'


from nltk.tokenize import sent_tokenize
###python -m nltk.downloader -d /usr/share/nltk_data all
####python -m nltk.downloader all
###########nltk.download()

reload(sys)
sys.setdefaultencoding('latin-1')

class SphinxitConfig(BaseSearchConfig):
    DEBUG = False
    WITH_META = False
    WITH_STATUS = False
    POOL_SIZE = 5
    # SQL_ENGINE = 'oursql'
    SEARCHD_CONNECTION = {
        'host': '127.0.0.1',
        'port': 9977,
    }

pp = pprint.PrettyPrinter(indent=4)
#delimiters      = ['\n', ' ', ',', '.', '?', '!', ':', ';', '\s', '\t', '\r']
# http://pyrocksdb.readthedocs.org/en/v0.4/tutorial/index.html
# https://github.com/sphinxsearch/sphinx/blob/master/api/sphinxapi.py
# http://www.tutorialspoint.com/python/python_database_access.htm
#mysql = MySQLdb.connect("localhost","root","###########99","onetipp" ) # last working
sphinx = MySQLdb.connect(
    host    = '127.0.0.1',
    user    = 'root',
    passwd  = '###########99',
    db      = 'onetipp',
    port    = 9977) # sphinxQL

cursorSphinx = sphinx.cursor()

mysql = MySQLdb.connect(
    host    = '127.0.0.1',
    user    = 'root',
    passwd  = '###########99',
    db      = 'onetipp',
    port    = 3306) # Mysql

cursorMysql = mysql.cursor()

def deumlaut(s):
    """
    Replaces umlauts with fake-umlauts
    """
    s = s.replace('\xdf', 'ss')
    s = s.replace('\xfc', 'ue')
    s = s.replace('\xdc', 'Ue')
    s = s.replace('\xf6', 'oe')
    s = s.replace('\xd6', 'Oe')
    s = s.replace('\xe4', 'ae')
    s = s.replace('\xc4', 'Ae')
    return s




inputfile = sys.argv[1]
outputfile = sys.argv[2]

# http://www.tutorialspoint.com/python/python_command_line_arguments.htm
# read file into string
text            = open(inputfile, 'r').read()
text.decode('latin-1')

#sent_tokenize_list = sent_tokenize(text)
tokens = nltk.word_tokenize(text)
#pp.pprint(tokens)

count = -1

for word in tokens:
    count += 1

    lstcWord = word[0:1]
    # if word.istitle():
    #if lstcWord.isupper():
    if len(word)>4:
        # 1. check if NamensDB eintrag -> y: write protect this entry
        # 2. check if Synonym_Unique -> y: take syononmy rand[0-4] -> 4 if > then 4 synonyms

        search_query = Search(indexes=['onetipp_name'], config=SphinxitConfig)
       # search_query = search_query.match(word).options(
        search_query = search_query.match(word).options(
            ranker='proximity_bm25',
            max_matches=1,
            max_query_time=350,
            field_weights={'name': 100, 'gender':-10000, 'language':-10000, 'meaning':-10000},
        )
        ###sphinx_result = search_query.ask()
        #pp.pprint(sphinx_result)
        #exit(0)

        word = "Sebasdasdfasdfasdfasdfasdfastian"
        cursorMysql.execute("SELECT * FROM (namen_table) WHERE name LIKE '%s%%' LIMIT 1;" % (word))
        #cursorMysql.execute("SELECT * FROM (namen_table) WHERE name = %s LIMIT 1;" % (word))
        name_content = cursorMysql.fetchone()
        print word +" = WORT und NAMENHIT =", name_content
        print "\n"
        exit(0)

        skip = 0
        # es wurde ein namen gefunden -> kein synonym austauschen
        #print "Skip Name ID pre: " , skip
        #print "<br >"
        try:
            skip = sphinx_result['result']['items'][0].values()[0]
        #    print word + " >>>> Skip Name ID nachdem gucken ob NamensDB Match: " , skip
        #    print "<br >"

            # es wurde KEIN namen gefunden -> synonym austauschen
         #   if skip == 0:
            if 1 == 1:
            #    print "(YES) Skip Name ID Wir k�nnen Synonym Match Starten: " , skip
            #    print "<br >"
                search_query_syn = Search(indexes=['onetipp_syn_simple'], config=SphinxitConfig)
                search_query_syn = search_query_syn.match(word).options(
                    ranker='proximity_bm25',
                    max_matches=1,
                    max_query_time=350,
                    field_weights={'synonyms': 100},
                )
                sphinx_result_syn = search_query_syn.ask()
                #pp.pprint(sphinx_result_syn)
                # http://stackoverflow.com/questions/7971618/python-return-first-n-keyvalue-pairs-from-dict
            #    print "es wurde kein name gefunden: "

                synID = 0
                try:
                    synID = sphinx_result_syn['result']['items'][0].values()[0]
                    if synID > 0:
                    #    print "SynDB has been found: ", synID

                        sql = "SELECT synonyms FROM (synonym_unique_simple) WHERE uid= %s" % (synID)
                        cursorMysql.execute(sql)
                        syn_content = cursorMysql.fetchone()

                        if syn_content:
                            synwords = syn_content[0].split(";")

                            # if first char of syn is uppercase than take it
                            # http://www.tutorialspoint.com/python/python_basic_operators.htm
                            for cSyn in synwords:
                                if cSyn:
                                #    print word + " = Originalwort -<>- Synonym > " + cSyn +"<br />"

                                    lstcSyn = cSyn[0:1]
                                    cSyn = deumlaut(cSyn)

                                    if lstcSyn.isupper() and lstcWord.isupper():
                                        tokens[count] = '<b style="color:#00FF00;">'+cSyn+'</b>'
                                     #   print "BIG HIT: <b style=\"color:#00FF00;\">" + cSyn + "</b><br />"
                                        break
                                    elif lstcSyn.islower() and lstcWord.islower():
                                        tokens[count] = '<b style="color:#00FF00;"><i>'+cSyn+'</i></b>'
                                     #   print "small hit: <b style=\"color:#00FF00;\"><i>" + cSyn + "</i></b><br />"
                                        break

                except IndexError:
                    print
        except IndexError:
            print

# file schreiben
outputtext  = ' '.join(tokens)
with codecs.open(outputfile,'w') as f:
    f.write(outputtext)
    f.close()

mysql.close()

#print outputtext

exit(0);